1  Processing HDF files

(ns scrapbook.data.hdf
  (:require [babashka.fs :as fs]
            [tech.v3.tensor :as tensor]
            [clojure.java.io :as io]
            [clojure.string :as string])
  (:import io.jhdf.HdfFile
           java.io.File))
(set! *warn-on-reflection* true)
true
(defn hdf5->tensors [path]
  (let [file ^File (io/file path)
        hdf-file ^HdfFile (HdfFile. file)
        children ^java.util.Map (.getChildren hdf-file)]
    (->> children
         keys
         (mapv (fn [key]
                 (let [child ^io.jhdf.dataset.ContiguousDatasetImpl (.get children key)
                       knew (keyword (first (string/split key
                                                          #" ")))]
                   {:key key
                    :data (-> child
                              .getData
                              tensor/->tensor)}))))))
(def tensors
  (hdf5->tensors "data/test.h5"))
(count tensors)
20
(take 3 tensors)
({:key "0", :data #tech.v3.tensor<int32>[600 800]
[[112 110 111 ... 114 118 124]
 [105 112 106 ... 115 120 119]
 [107 109 108 ... 117 123 115]
 ...
 [132 138 134 ... 109 107 109]
 [130 129 129 ... 115 111 103]
 [134 135 134 ... 110 111 110]]}
 {:key "1", :data #tech.v3.tensor<int32>[600 800]
[[124 120 121 ... 117 121 124]
 [118 123 117 ... 118 122 121]
 [120 123 117 ... 119 123 118]
 ...
 [133 139 136 ... 111 106 108]
 [133 132 131 ... 116 111 103]
 [136 137 135 ... 114 111 108]]}
 {:key "10", :data #tech.v3.tensor<int32>[600 800]
[[125 120 120 ... 116 121 127]
 [116 122 116 ... 117 121 119]
 [120 121 115 ... 120 123 118]
 ...
 [132 139 133 ... 112 107 108]
 [135 132 132 ... 115 112 104]
 [134 135 135 ... 114 112 108]]})